home *** CD-ROM | disk | FTP | other *** search
- Path: abcfd20.larc.nasa.gov!amiga-request
- From: amiga-request@abcfd20.larc.nasa.gov (Amiga Sources/Binaries Moderator)
- Subject: v90i232: flex 2.3 - fast lexical analyzer generator, Part05/13
- Reply-To: loftus@wpllabs.uucp (William P Loftus)
- Newsgroups: comp.sources.amiga
- Message-ID: <comp.sources.amiga:v90i232@abcfd20.larc.nasa.gov>
- Date: 19 Aug 90 22:42:32 GMT
- Approved: tadguy@uunet.UU.NET (Tad Guy)
- X-Mail-Submissions-To: amiga@uunet.uu.net
- X-Post-Discussions-To: comp.sys.amiga
-
- Submitted-by: loftus@wpllabs.uucp (William P Loftus)
- Posting-number: Volume 90, Issue 232
- Archive-name: unix/flex-2.3/part05
-
- #!/bin/sh
- # This is a shell archive. Remove anything before this line, then unpack
- # it by saving it into a file and typing "sh file". To overwrite existing
- # files, type "sh file -c". You can also feed this as standard input via
- # unshar, or by typing "sh <file", e.g.. If this archive is complete, you
- # will see the following message at the end:
- # "End of archive 5 (of 13)."
- # Contents: dfa.c flex.Doc
- # Wrapped by tadguy@abcfd20 on Sun Aug 19 18:41:44 1990
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'dfa.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'dfa.c'\"
- else
- echo shar: Extracting \"'dfa.c'\" \(26919 characters\)
- sed "s/^X//" >'dfa.c' <<'END_OF_FILE'
- X/* dfa - DFA construction routines */
- X
- X/*-
- X * Copyright (c) 1990 The Regents of the University of California.
- X * All rights reserved.
- X *
- X * This code is derived from software contributed to Berkeley by
- X * Vern Paxson.
- X *
- X * The United States Government has rights in this work pursuant
- X * to contract no. DE-AC03-76SF00098 between the United States
- X * Department of Energy and the University of California.
- X *
- X * Redistribution and use in source and binary forms are permitted provided
- X * that: (1) source distributions retain this entire copyright notice and
- X * comment, and (2) distributions including binaries display the following
- X * acknowledgement: ``This product includes software developed by the
- X * University of California, Berkeley and its contributors'' in the
- X * documentation or other materials provided with the distribution and in
- X * all advertising materials mentioning features or use of this software.
- X * Neither the name of the University nor the names of its contributors may
- X * be used to endorse or promote products derived from this software without
- X * specific prior written permission.
- X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
- X * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
- X * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- X */
- X
- X#ifndef lint
- Xstatic char rcsid[] =
- X "@(#) $Header: /usr/fsys/odin/a/vern/flex/RCS/dfa.c,v 2.7 90/06/27 23:48:15 vern Exp $ (LBL)";
- X#endif
- X
- X#include "flexdef.h"
- X
- X
- X/* declare functions that have forward references */
- X
- Xvoid dump_associated_rules PROTO((FILE*, int));
- Xvoid dump_transitions PROTO((FILE*, int[]));
- Xvoid sympartition PROTO((int[], int, int[], int[]));
- Xint symfollowset PROTO((int[], int, int, int[]));
- X
- X
- X/* check_for_backtracking - check a DFA state for backtracking
- X *
- X * synopsis
- X * int ds, state[numecs];
- X * check_for_backtracking( ds, state );
- X *
- X * ds is the number of the state to check and state[] is its out-transitions,
- X * indexed by equivalence class, and state_rules[] is the set of rules
- X * associated with this state
- X */
- X
- Xvoid check_for_backtracking( ds, state )
- Xint ds;
- Xint state[];
- X
- X {
- X if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state )
- X { /* state is non-accepting */
- X ++num_backtracking;
- X
- X if ( backtrack_report )
- X {
- X fprintf( backtrack_file, "State #%d is non-accepting -\n", ds );
- X
- X /* identify the state */
- X dump_associated_rules( backtrack_file, ds );
- X
- X /* now identify it further using the out- and jam-transitions */
- X dump_transitions( backtrack_file, state );
- X
- X putc( '\n', backtrack_file );
- X }
- X }
- X }
- X
- X
- X/* check_trailing_context - check to see if NFA state set constitutes
- X * "dangerous" trailing context
- X *
- X * synopsis
- X * int nfa_states[num_states+1], num_states;
- X * int accset[nacc+1], nacc;
- X * check_trailing_context( nfa_states, num_states, accset, nacc );
- X *
- X * NOTES
- X * Trailing context is "dangerous" if both the head and the trailing
- X * part are of variable size \and/ there's a DFA state which contains
- X * both an accepting state for the head part of the rule and NFA states
- X * which occur after the beginning of the trailing context.
- X * When such a rule is matched, it's impossible to tell if having been
- X * in the DFA state indicates the beginning of the trailing context
- X * or further-along scanning of the pattern. In these cases, a warning
- X * message is issued.
- X *
- X * nfa_states[1 .. num_states] is the list of NFA states in the DFA.
- X * accset[1 .. nacc] is the list of accepting numbers for the DFA state.
- X */
- X
- Xvoid check_trailing_context( nfa_states, num_states, accset, nacc )
- Xint *nfa_states, num_states;
- Xint *accset;
- Xregister int nacc;
- X
- X {
- X register int i, j;
- X
- X for ( i = 1; i <= num_states; ++i )
- X {
- X int ns = nfa_states[i];
- X register int type = state_type[ns];
- X register int ar = assoc_rule[ns];
- X
- X if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
- X { /* do nothing */
- X }
- X
- X else if ( type == STATE_TRAILING_CONTEXT )
- X {
- X /* potential trouble. Scan set of accepting numbers for
- X * the one marking the end of the "head". We assume that
- X * this looping will be fairly cheap since it's rare that
- X * an accepting number set is large.
- X */
- X for ( j = 1; j <= nacc; ++j )
- X if ( accset[j] & YY_TRAILING_HEAD_MASK )
- X {
- X fprintf( stderr,
- X "%s: Dangerous trailing context in rule at line %d\n",
- X program_name, rule_linenum[ar] );
- X return;
- X }
- X }
- X }
- X }
- X
- X
- X/* dump_associated_rules - list the rules associated with a DFA state
- X *
- X * synopisis
- X * int ds;
- X * FILE *file;
- X * dump_associated_rules( file, ds );
- X *
- X * goes through the set of NFA states associated with the DFA and
- X * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
- X * and writes a report to the given file
- X */
- X
- Xvoid dump_associated_rules( file, ds )
- XFILE *file;
- Xint ds;
- X
- X {
- X register int i, j;
- X register int num_associated_rules = 0;
- X int rule_set[MAX_ASSOC_RULES + 1];
- X int *dset = dss[ds];
- X int size = dfasiz[ds];
- X
- X for ( i = 1; i <= size; ++i )
- X {
- X register rule_num = rule_linenum[assoc_rule[dset[i]]];
- X
- X for ( j = 1; j <= num_associated_rules; ++j )
- X if ( rule_num == rule_set[j] )
- X break;
- X
- X if ( j > num_associated_rules )
- X { /* new rule */
- X if ( num_associated_rules < MAX_ASSOC_RULES )
- X rule_set[++num_associated_rules] = rule_num;
- X }
- X }
- X
- X bubble( rule_set, num_associated_rules );
- X
- X fprintf( file, " associated rule line numbers:" );
- X
- X for ( i = 1; i <= num_associated_rules; ++i )
- X {
- X if ( i % 8 == 1 )
- X putc( '\n', file );
- X
- X fprintf( file, "\t%d", rule_set[i] );
- X }
- X
- X putc( '\n', file );
- X }
- X
- X
- X/* dump_transitions - list the transitions associated with a DFA state
- X *
- X * synopisis
- X * int state[numecs];
- X * FILE *file;
- X * dump_transitions( file, state );
- X *
- X * goes through the set of out-transitions and lists them in human-readable
- X * form (i.e., not as equivalence classes); also lists jam transitions
- X * (i.e., all those which are not out-transitions, plus EOF). The dump
- X * is done to the given file.
- X */
- X
- Xvoid dump_transitions( file, state )
- XFILE *file;
- Xint state[];
- X
- X {
- X register int i, ec;
- X int out_char_set[CSIZE];
- X
- X for ( i = 0; i < csize; ++i )
- X {
- X ec = abs( ecgroup[i] );
- X out_char_set[i] = state[ec];
- X }
- X
- X fprintf( file, " out-transitions: " );
- X
- X list_character_set( file, out_char_set );
- X
- X /* now invert the members of the set to get the jam transitions */
- X for ( i = 0; i < csize; ++i )
- X out_char_set[i] = ! out_char_set[i];
- X
- X fprintf( file, "\n jam-transitions: EOF " );
- X
- X list_character_set( file, out_char_set );
- X
- X putc( '\n', file );
- X }
- X
- X
- X/* epsclosure - construct the epsilon closure of a set of ndfa states
- X *
- X * synopsis
- X * int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc;
- X * int hashval;
- X * int *epsclosure();
- X * t = epsclosure( t, &numstates, accset, &nacc, &hashval );
- X *
- X * NOTES
- X * the epsilon closure is the set of all states reachable by an arbitrary
- X * number of epsilon transitions which themselves do not have epsilon
- X * transitions going out, unioned with the set of states which have non-null
- X * accepting numbers. t is an array of size numstates of nfa state numbers.
- X * Upon return, t holds the epsilon closure and numstates is updated. accset
- X * holds a list of the accepting numbers, and the size of accset is given
- X * by nacc. t may be subjected to reallocation if it is not large enough
- X * to hold the epsilon closure.
- X *
- X * hashval is the hash value for the dfa corresponding to the state set
- X */
- X
- Xint *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
- Xint *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
- X
- X {
- X register int stkpos, ns, tsp;
- X int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
- X int stkend, nstate;
- X static int did_stk_init = false, *stk;
- X
- X#define MARK_STATE(state) \
- X trans1[state] = trans1[state] - MARKER_DIFFERENCE;
- X
- X#define IS_MARKED(state) (trans1[state] < 0)
- X
- X#define UNMARK_STATE(state) \
- X trans1[state] = trans1[state] + MARKER_DIFFERENCE;
- X
- X#define CHECK_ACCEPT(state) \
- X { \
- X nfaccnum = accptnum[state]; \
- X if ( nfaccnum != NIL ) \
- X accset[++nacc] = nfaccnum; \
- X }
- X
- X#define DO_REALLOCATION \
- X { \
- X current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
- X ++num_reallocs; \
- X t = reallocate_integer_array( t, current_max_dfa_size ); \
- X stk = reallocate_integer_array( stk, current_max_dfa_size ); \
- X } \
- X
- X#define PUT_ON_STACK(state) \
- X { \
- X if ( ++stkend >= current_max_dfa_size ) \
- X DO_REALLOCATION \
- X stk[stkend] = state; \
- X MARK_STATE(state) \
- X }
- X
- X#define ADD_STATE(state) \
- X { \
- X if ( ++numstates >= current_max_dfa_size ) \
- X DO_REALLOCATION \
- X t[numstates] = state; \
- X hashval = hashval + state; \
- X }
- X
- X#define STACK_STATE(state) \
- X { \
- X PUT_ON_STACK(state) \
- X CHECK_ACCEPT(state) \
- X if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
- X ADD_STATE(state) \
- X }
- X
- X if ( ! did_stk_init )
- X {
- X stk = allocate_integer_array( current_max_dfa_size );
- X did_stk_init = true;
- X }
- X
- X nacc = stkend = hashval = 0;
- X
- X for ( nstate = 1; nstate <= numstates; ++nstate )
- X {
- X ns = t[nstate];
- X
- X /* the state could be marked if we've already pushed it onto
- X * the stack
- X */
- X if ( ! IS_MARKED(ns) )
- X PUT_ON_STACK(ns)
- X
- X CHECK_ACCEPT(ns)
- X hashval = hashval + ns;
- X }
- X
- X for ( stkpos = 1; stkpos <= stkend; ++stkpos )
- X {
- X ns = stk[stkpos];
- X transsym = transchar[ns];
- X
- X if ( transsym == SYM_EPSILON )
- X {
- X tsp = trans1[ns] + MARKER_DIFFERENCE;
- X
- X if ( tsp != NO_TRANSITION )
- X {
- X if ( ! IS_MARKED(tsp) )
- X STACK_STATE(tsp)
- X
- X tsp = trans2[ns];
- X
- X if ( tsp != NO_TRANSITION )
- X if ( ! IS_MARKED(tsp) )
- X STACK_STATE(tsp)
- X }
- X }
- X }
- X
- X /* clear out "visit" markers */
- X
- X for ( stkpos = 1; stkpos <= stkend; ++stkpos )
- X {
- X if ( IS_MARKED(stk[stkpos]) )
- X {
- X UNMARK_STATE(stk[stkpos])
- X }
- X else
- X flexfatal( "consistency check failed in epsclosure()" );
- X }
- X
- X *ns_addr = numstates;
- X *hv_addr = hashval;
- X *nacc_addr = nacc;
- X
- X return ( t );
- X }
- X
- X
- X/* increase_max_dfas - increase the maximum number of DFAs */
- X
- Xvoid increase_max_dfas()
- X
- X {
- X current_max_dfas += MAX_DFAS_INCREMENT;
- X
- X ++num_reallocs;
- X
- X base = reallocate_integer_array( base, current_max_dfas );
- X def = reallocate_integer_array( def, current_max_dfas );
- X dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
- X accsiz = reallocate_integer_array( accsiz, current_max_dfas );
- X dhash = reallocate_integer_array( dhash, current_max_dfas );
- X dss = reallocate_int_ptr_array( dss, current_max_dfas );
- X dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
- X
- X if ( nultrans )
- X nultrans = reallocate_integer_array( nultrans, current_max_dfas );
- X }
- X
- X
- X/* ntod - convert an ndfa to a dfa
- X *
- X * synopsis
- X * ntod();
- X *
- X * creates the dfa corresponding to the ndfa we've constructed. the
- X * dfa starts out in state #1.
- X */
- X
- Xvoid ntod()
- X
- X {
- X int *accset, ds, nacc, newds;
- X int sym, hashval, numstates, dsize;
- X int num_full_table_rows; /* used only for -f */
- X int *nset, *dset;
- X int targptr, totaltrans, i, comstate, comfreq, targ;
- X int *epsclosure(), snstods(), symlist[CSIZE + 1];
- X int num_start_states;
- X int todo_head, todo_next;
- X
- X /* note that the following are indexed by *equivalence classes*
- X * and not by characters. Since equivalence classes are indexed
- X * beginning with 1, even if the scanner accepts NUL's, this
- X * means that (since every character is potentially in its own
- X * equivalence class) these arrays must have room for indices
- X * from 1 to CSIZE, so their size must be CSIZE + 1.
- X */
- X int duplist[CSIZE + 1], state[CSIZE + 1];
- X int targfreq[CSIZE + 1], targstate[CSIZE + 1];
- X
- X /* this is so find_table_space(...) will know where to start looking in
- X * chk/nxt for unused records for space to put in the state
- X */
- X if ( fullspd )
- X firstfree = 0;
- X
- X accset = allocate_integer_array( num_rules + 1 );
- X nset = allocate_integer_array( current_max_dfa_size );
- X
- X /* the "todo" queue is represented by the head, which is the DFA
- X * state currently being processed, and the "next", which is the
- X * next DFA state number available (not in use). We depend on the
- X * fact that snstods() returns DFA's \in increasing order/, and thus
- X * need only know the bounds of the dfas to be processed.
- X */
- X todo_head = todo_next = 0;
- X
- X for ( i = 0; i <= csize; ++i )
- X {
- X duplist[i] = NIL;
- X symlist[i] = false;
- X }
- X
- X for ( i = 0; i <= num_rules; ++i )
- X accset[i] = NIL;
- X
- X if ( trace )
- X {
- X dumpnfa( scset[1] );
- X fputs( "\n\nDFA Dump:\n\n", stderr );
- X }
- X
- X inittbl();
- X
- X /* check to see whether we should build a separate table for transitions
- X * on NUL characters. We don't do this for full-speed (-F) scanners,
- X * since for them we don't have a simple state number lying around with
- X * which to index the table. We also don't bother doing it for scanners
- X * unless (1) NUL is in its own equivalence class (indicated by a
- X * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is
- X * the last equivalence class, and (3) the number of equivalence classes
- X * is the same as the number of characters. This latter case comes about
- X * when useecs is false or when its true but every character still
- X * manages to land in its own class (unlikely, but it's cheap to check
- X * for). If all these things are true then the character code needed
- X * to represent NUL's equivalence class for indexing the tables is
- X * going to take one more bit than the number of characters, and therefore
- X * we won't be assured of being able to fit it into a YY_CHAR variable.
- X * This rules out storing the transitions in a compressed table, since
- X * the code for interpreting them uses a YY_CHAR variable (perhaps it
- X * should just use an integer, though; this is worth pondering ... ###).
- X *
- X * Finally, for full tables, we want the number of entries in the
- X * table to be a power of two so the array references go fast (it
- X * will just take a shift to compute the major index). If encoding
- X * NUL's transitions in the table will spoil this, we give it its
- X * own table (note that this will be the case if we're not using
- X * equivalence classes).
- X */
- X
- X /* note that the test for ecgroup[0] == numecs below accomplishes
- X * both (1) and (2) above
- X */
- X if ( ! fullspd && ecgroup[0] == numecs )
- X { /* NUL is alone in its equivalence class, which is the last one */
- X int use_NUL_table = (numecs == csize);
- X
- X if ( fulltbl && ! use_NUL_table )
- X { /* we still may want to use the table if numecs is a power of 2 */
- X int power_of_two;
- X
- X for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 )
- X if ( numecs == power_of_two )
- X {
- X use_NUL_table = true;
- X break;
- X }
- X }
- X
- X if ( use_NUL_table )
- X nultrans = allocate_integer_array( current_max_dfas );
- X /* from now on, nultrans != nil indicates that we're
- X * saving null transitions for later, separate encoding
- X */
- X }
- X
- X
- X if ( fullspd )
- X {
- X for ( i = 0; i <= numecs; ++i )
- X state[i] = 0;
- X place_state( state, 0, 0 );
- X }
- X
- X else if ( fulltbl )
- X {
- X if ( nultrans )
- X /* we won't be including NUL's transitions in the table,
- X * so build it for entries from 0 .. numecs - 1
- X */
- X num_full_table_rows = numecs;
- X
- X else
- X /* take into account the fact that we'll be including
- X * the NUL entries in the transition table. Build it
- X * from 0 .. numecs.
- X */
- X num_full_table_rows = numecs + 1;
- X
- X /* declare it "short" because it's a real long-shot that that
- X * won't be large enough.
- X */
- X printf( "static short int yy_nxt[][%d] =\n {\n",
- X /* '}' so vi doesn't get too confused */
- X num_full_table_rows );
- X
- X /* generate 0 entries for state #0 */
- X for ( i = 0; i < num_full_table_rows; ++i )
- X mk2data( 0 );
- X
- X /* force ',' and dataflush() next call to mk2data */
- X datapos = NUMDATAITEMS;
- X
- X /* force extra blank line next dataflush() */
- X dataline = NUMDATALINES;
- X }
- X
- X /* create the first states */
- X
- X num_start_states = lastsc * 2;
- X
- X for ( i = 1; i <= num_start_states; ++i )
- X {
- X numstates = 1;
- X
- X /* for each start condition, make one state for the case when
- X * we're at the beginning of the line (the '%' operator) and
- X * one for the case when we're not
- X */
- X if ( i % 2 == 1 )
- X nset[numstates] = scset[(i / 2) + 1];
- X else
- X nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
- X
- X nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
- X
- X if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
- X {
- X numas += nacc;
- X totnst += numstates;
- X ++todo_next;
- X
- X if ( variable_trailing_context_rules && nacc > 0 )
- X check_trailing_context( nset, numstates, accset, nacc );
- X }
- X }
- X
- X if ( ! fullspd )
- X {
- X if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
- X flexfatal( "could not create unique end-of-buffer state" );
- X
- X ++numas;
- X ++num_start_states;
- X ++todo_next;
- X }
- X
- X while ( todo_head < todo_next )
- X {
- X targptr = 0;
- X totaltrans = 0;
- X
- X for ( i = 1; i <= numecs; ++i )
- X state[i] = 0;
- X
- X ds = ++todo_head;
- X
- X dset = dss[ds];
- X dsize = dfasiz[ds];
- X
- X if ( trace )
- X fprintf( stderr, "state # %d:\n", ds );
- X
- X sympartition( dset, dsize, symlist, duplist );
- X
- X for ( sym = 1; sym <= numecs; ++sym )
- X {
- X if ( symlist[sym] )
- X {
- X symlist[sym] = 0;
- X
- X if ( duplist[sym] == NIL )
- X { /* symbol has unique out-transitions */
- X numstates = symfollowset( dset, dsize, sym, nset );
- X nset = epsclosure( nset, &numstates, accset,
- X &nacc, &hashval );
- X
- X if ( snstods( nset, numstates, accset,
- X nacc, hashval, &newds ) )
- X {
- X totnst = totnst + numstates;
- X ++todo_next;
- X numas += nacc;
- X
- X if ( variable_trailing_context_rules && nacc > 0 )
- X check_trailing_context( nset, numstates,
- X accset, nacc );
- X }
- X
- X state[sym] = newds;
- X
- X if ( trace )
- X fprintf( stderr, "\t%d\t%d\n", sym, newds );
- X
- X targfreq[++targptr] = 1;
- X targstate[targptr] = newds;
- X ++numuniq;
- X }
- X
- X else
- X {
- X /* sym's equivalence class has the same transitions
- X * as duplist(sym)'s equivalence class
- X */
- X targ = state[duplist[sym]];
- X state[sym] = targ;
- X
- X if ( trace )
- X fprintf( stderr, "\t%d\t%d\n", sym, targ );
- X
- X /* update frequency count for destination state */
- X
- X i = 0;
- X while ( targstate[++i] != targ )
- X ;
- X
- X ++targfreq[i];
- X ++numdup;
- X }
- X
- X ++totaltrans;
- X duplist[sym] = NIL;
- X }
- X }
- X
- X numsnpairs = numsnpairs + totaltrans;
- X
- X if ( caseins && ! useecs )
- X {
- X register int j;
- X
- X for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
- X state[i] = state[j];
- X }
- X
- X if ( ds > num_start_states )
- X check_for_backtracking( ds, state );
- X
- X if ( nultrans )
- X {
- X nultrans[ds] = state[NUL_ec];
- X state[NUL_ec] = 0; /* remove transition */
- X }
- X
- X if ( fulltbl )
- X {
- X /* supply array's 0-element */
- X if ( ds == end_of_buffer_state )
- X mk2data( -end_of_buffer_state );
- X else
- X mk2data( end_of_buffer_state );
- X
- X for ( i = 1; i < num_full_table_rows; ++i )
- X /* jams are marked by negative of state number */
- X mk2data( state[i] ? state[i] : -ds );
- X
- X /* force ',' and dataflush() next call to mk2data */
- X datapos = NUMDATAITEMS;
- X
- X /* force extra blank line next dataflush() */
- X dataline = NUMDATALINES;
- X }
- X
- X else if ( fullspd )
- X place_state( state, ds, totaltrans );
- X
- X else if ( ds == end_of_buffer_state )
- X /* special case this state to make sure it does what it's
- X * supposed to, i.e., jam on end-of-buffer
- X */
- X stack1( ds, 0, 0, JAMSTATE );
- X
- X else /* normal, compressed state */
- X {
- X /* determine which destination state is the most common, and
- X * how many transitions to it there are
- X */
- X
- X comfreq = 0;
- X comstate = 0;
- X
- X for ( i = 1; i <= targptr; ++i )
- X if ( targfreq[i] > comfreq )
- X {
- X comfreq = targfreq[i];
- X comstate = targstate[i];
- X }
- X
- X bldtbl( state, ds, totaltrans, comstate, comfreq );
- X }
- X }
- X
- X if ( fulltbl )
- X dataend();
- X
- X else if ( ! fullspd )
- X {
- X cmptmps(); /* create compressed template entries */
- X
- X /* create tables for all the states with only one out-transition */
- X while ( onesp > 0 )
- X {
- X mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
- X onedef[onesp] );
- X --onesp;
- X }
- X
- X mkdeftbl();
- X }
- X }
- X
- X
- X/* snstods - converts a set of ndfa states into a dfa state
- X *
- X * synopsis
- X * int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval;
- X * int snstods();
- X * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds );
- X *
- X * on return, the dfa state number is in newds.
- X */
- X
- Xint snstods( sns, numstates, accset, nacc, hashval, newds_addr )
- Xint sns[], numstates, accset[], nacc, hashval, *newds_addr;
- X
- X {
- X int didsort = 0;
- X register int i, j;
- X int newds, *oldsns;
- X
- X for ( i = 1; i <= lastdfa; ++i )
- X if ( hashval == dhash[i] )
- X {
- X if ( numstates == dfasiz[i] )
- X {
- X oldsns = dss[i];
- X
- X if ( ! didsort )
- X {
- X /* we sort the states in sns so we can compare it to
- X * oldsns quickly. we use bubble because there probably
- X * aren't very many states
- X */
- X bubble( sns, numstates );
- X didsort = 1;
- X }
- X
- X for ( j = 1; j <= numstates; ++j )
- X if ( sns[j] != oldsns[j] )
- X break;
- X
- X if ( j > numstates )
- X {
- X ++dfaeql;
- X *newds_addr = i;
- X return ( 0 );
- X }
- X
- X ++hshcol;
- X }
- X
- X else
- X ++hshsave;
- X }
- X
- X /* make a new dfa */
- X
- X if ( ++lastdfa >= current_max_dfas )
- X increase_max_dfas();
- X
- X newds = lastdfa;
- X
- X dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) );
- X
- X if ( ! dss[newds] )
- X flexfatal( "dynamic memory failure in snstods()" );
- X
- X /* if we haven't already sorted the states in sns, we do so now, so that
- X * future comparisons with it can be made quickly
- X */
- X
- X if ( ! didsort )
- X bubble( sns, numstates );
- X
- X for ( i = 1; i <= numstates; ++i )
- X dss[newds][i] = sns[i];
- X
- X dfasiz[newds] = numstates;
- X dhash[newds] = hashval;
- X
- X if ( nacc == 0 )
- X {
- X if ( reject )
- X dfaacc[newds].dfaacc_set = (int *) 0;
- X else
- X dfaacc[newds].dfaacc_state = 0;
- X
- X accsiz[newds] = 0;
- X }
- X
- X else if ( reject )
- X {
- X /* we sort the accepting set in increasing order so the disambiguating
- X * rule that the first rule listed is considered match in the event of
- X * ties will work. We use a bubble sort since the list is probably
- X * quite small.
- X */
- X
- X bubble( accset, nacc );
- X
- X dfaacc[newds].dfaacc_set =
- X (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) );
- X
- X if ( ! dfaacc[newds].dfaacc_set )
- X flexfatal( "dynamic memory failure in snstods()" );
- X
- X /* save the accepting set for later */
- X for ( i = 1; i <= nacc; ++i )
- X dfaacc[newds].dfaacc_set[i] = accset[i];
- X
- X accsiz[newds] = nacc;
- X }
- X
- X else
- X { /* find lowest numbered rule so the disambiguating rule will work */
- X j = num_rules + 1;
- X
- X for ( i = 1; i <= nacc; ++i )
- X if ( accset[i] < j )
- X j = accset[i];
- X
- X dfaacc[newds].dfaacc_state = j;
- X }
- X
- X *newds_addr = newds;
- X
- X return ( 1 );
- X }
- X
- X
- X/* symfollowset - follow the symbol transitions one step
- X *
- X * synopsis
- X * int ds[current_max_dfa_size], dsize, transsym;
- X * int nset[current_max_dfa_size], numstates;
- X * numstates = symfollowset( ds, dsize, transsym, nset );
- X */
- X
- Xint symfollowset( ds, dsize, transsym, nset )
- Xint ds[], dsize, transsym, nset[];
- X
- X {
- X int ns, tsp, sym, i, j, lenccl, ch, numstates;
- X int ccllist;
- X
- X numstates = 0;
- X
- X for ( i = 1; i <= dsize; ++i )
- X { /* for each nfa state ns in the state set of ds */
- X ns = ds[i];
- X sym = transchar[ns];
- X tsp = trans1[ns];
- X
- X if ( sym < 0 )
- X { /* it's a character class */
- X sym = -sym;
- X ccllist = cclmap[sym];
- X lenccl = ccllen[sym];
- X
- X if ( cclng[sym] )
- X {
- X for ( j = 0; j < lenccl; ++j )
- X { /* loop through negated character class */
- X ch = ccltbl[ccllist + j];
- X
- X if ( ch == 0 )
- X ch = NUL_ec;
- X
- X if ( ch > transsym )
- X break; /* transsym isn't in negated ccl */
- X
- X else if ( ch == transsym )
- X /* next 2 */ goto bottom;
- X }
- X
- X /* didn't find transsym in ccl */
- X nset[++numstates] = tsp;
- X }
- X
- X else
- X for ( j = 0; j < lenccl; ++j )
- X {
- X ch = ccltbl[ccllist + j];
- X
- X if ( ch == 0 )
- X ch = NUL_ec;
- X
- X if ( ch > transsym )
- X break;
- X
- X else if ( ch == transsym )
- X {
- X nset[++numstates] = tsp;
- X break;
- X }
- X }
- X }
- X
- X else if ( sym >= 'A' && sym <= 'Z' && caseins )
- X flexfatal( "consistency check failed in symfollowset" );
- X
- X else if ( sym == SYM_EPSILON )
- X { /* do nothing */
- X }
- X
- X else if ( abs( ecgroup[sym] ) == transsym )
- X nset[++numstates] = tsp;
- X
- Xbottom:
- X ;
- X }
- X
- X return ( numstates );
- X }
- X
- X
- X/* sympartition - partition characters with same out-transitions
- X *
- X * synopsis
- X * integer ds[current_max_dfa_size], numstates, duplist[numecs];
- X * symlist[numecs];
- X * sympartition( ds, numstates, symlist, duplist );
- X */
- X
- Xvoid sympartition( ds, numstates, symlist, duplist )
- Xint ds[], numstates, duplist[];
- Xint symlist[];
- X
- X {
- X int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
- X
- X /* partitioning is done by creating equivalence classes for those
- X * characters which have out-transitions from the given state. Thus
- X * we are really creating equivalence classes of equivalence classes.
- X */
- X
- X for ( i = 1; i <= numecs; ++i )
- X { /* initialize equivalence class list */
- X duplist[i] = i - 1;
- X dupfwd[i] = i + 1;
- X }
- X
- X duplist[1] = NIL;
- X dupfwd[numecs] = NIL;
- X
- X for ( i = 1; i <= numstates; ++i )
- X {
- X ns = ds[i];
- X tch = transchar[ns];
- X
- X if ( tch != SYM_EPSILON )
- X {
- X if ( tch < -lastccl || tch > csize )
- X {
- X if ( tch > csize && tch <= CSIZE )
- X flexerror( "scanner requires -8 flag" );
- X
- X else
- X flexfatal(
- X "bad transition character detected in sympartition()" );
- X }
- X
- X if ( tch >= 0 )
- X { /* character transition */
- X /* abs() needed for fake %t ec's */
- X int ec = abs( ecgroup[tch] );
- X
- X mkechar( ec, dupfwd, duplist );
- X symlist[ec] = 1;
- X }
- X
- X else
- X { /* character class */
- X tch = -tch;
- X
- X lenccl = ccllen[tch];
- X cclp = cclmap[tch];
- X mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs,
- X NUL_ec );
- X
- X if ( cclng[tch] )
- X {
- X j = 0;
- X
- X for ( k = 0; k < lenccl; ++k )
- X {
- X ich = ccltbl[cclp + k];
- X
- X if ( ich == 0 )
- X ich = NUL_ec;
- X
- X for ( ++j; j < ich; ++j )
- X symlist[j] = 1;
- X }
- X
- X for ( ++j; j <= numecs; ++j )
- X symlist[j] = 1;
- X }
- X
- X else
- X for ( k = 0; k < lenccl; ++k )
- X {
- X ich = ccltbl[cclp + k];
- X
- X if ( ich == 0 )
- X ich = NUL_ec;
- X
- X symlist[ich] = 1;
- X }
- X }
- X }
- X }
- X }
- END_OF_FILE
- if test 26919 -ne `wc -c <'dfa.c'`; then
- echo shar: \"'dfa.c'\" unpacked with wrong size!
- fi
- # end of 'dfa.c'
- fi
- if test -f 'flex.Doc' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'flex.Doc'\"
- else
- echo shar: Extracting \"'flex.Doc'\" \(25372 characters\)
- sed "s/^X//" >'flex.Doc' <<'END_OF_FILE'
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- XNAME
- X flex - fast lexical analyzer generator
- X
- XSYNOPSIS
- X flex [-bcdfinpstvFILT8 -C[efmF] -Sskeleton] [filename ...]
- X
- XDESCRIPTION
- X flex is a tool for generating scanners: programs which
- X recognized lexical patterns in text. flex reads the given
- X input files, or its standard input if no file names are
- X given, for a description of a scanner to generate. The
- X description is in the form of pairs of regular expressions
- X and C code, called rules. flex generates as output a C
- X source file, lex.yy.c, which defines a routine yylex(). This
- X file is compiled and linked with the -lfl library to produce
- X an executable. When the executable is run, it analyzes its
- X input for occurrences of the regular expressions. Whenever
- X it finds one, it executes the corresponding C code.
- X
- X For full documentation, see flexdoc(1). This manual entry is
- X intended for use as a quick reference.
- X
- XOPTIONS
- X flex has the following options:
- X
- X -b Generate backtracking information to lex.backtrack.
- X This is a list of scanner states which require back-
- X tracking and the input characters on which they do so.
- X By adding rules one can remove backtracking states. If
- X all backtracking states are eliminated and -f or -F is
- X used, the generated scanner will run faster.
- X
- X -c is a do-nothing, deprecated option included for POSIX
- X compliance.
- X
- X NOTE: in previous releases of flex -c specified table-
- X compression options. This functionality is now given
- X by the -C flag. To ease the the impact of this change,
- X when flex encounters -c, it currently issues a warning
- X message and assumes that -C was desired instead. In
- X the future this "promotion" of -c to -C will go away in
- X the name of full POSIX compliance (unless the POSIX
- X meaning is removed first).
- X
- X -d makes the generated scanner run in debug mode. When-
- X ever a pattern is recognized and the global
- X yy_flex_debug is non-zero (which is the default), the
- X scanner will write to stderr a line of the form:
- X
- X --accepting rule at line 53 ("the matched text")
- X
- X The line number refers to the location of the rule in
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 1
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X the file defining the scanner (i.e., the file that was
- X fed to flex). Messages are also generated when the
- X scanner backtracks, accepts the default rule, reaches
- X the end of its input buffer (or encounters a NUL; the
- X two look the same as far as the scanner's concerned),
- X or reaches an end-of-file.
- X
- X -f specifies (take your pick) full table or fast scanner.
- X No table compression is done. The result is large but
- X fast. This option is equivalent to -Cf (see below).
- X
- X -i instructs flex to generate a case-insensitive scanner.
- X The case of letters given in the flex input patterns
- X will be ignored, and tokens in the input will be
- X matched regardless of case. The matched text given in
- X yytext will have the preserved case (i.e., it will not
- X be folded).
- X
- X -n is another do-nothing, deprecated option included only
- X for POSIX compliance.
- X
- X -p generates a performance report to stderr. The report
- X consists of comments regarding features of the flex
- X input file which will cause a loss of performance in
- X the resulting scanner.
- X
- X -s causes the default rule (that unmatched scanner input
- X is echoed to stdout) to be suppressed. If the scanner
- X encounters input that does not match any of its rules,
- X it aborts with an error.
- X
- X -t instructs flex to write the scanner it generates to
- X standard output instead of lex.yy.c.
- X
- X -v specifies that flex should write to stderr a summary of
- X statistics regarding the scanner it generates.
- X
- X -F specifies that the fast scanner table representation
- X should be used. This representation is about as fast
- X as the full table representation (-f), and for some
- X sets of patterns will be considerably smaller (and for
- X others, larger). See flexdoc(1) for details.
- X
- X This option is equivalent to -CF (see below).
- X
- X -I instructs flex to generate an interactive scanner, that
- X is, a scanner which stops immediately rather than look-
- X ing ahead if it knows that the currently scanned text
- X cannot be part of a longer rule's match. Again, see
- X flexdoc(1) for details.
- X
- X Note, -I cannot be used in conjunction with full or
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 2
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X fast tables, i.e., the -f, -F, -Cf, or -CF flags.
- X
- X -L instructs flex not to generate #line directives in
- X lex.yy.c. The default is to generate such directives so
- X error messages in the actions will be correctly located
- X with respect to the original flex input file, and not
- X to the fairly meaningless line numbers of lex.yy.c.
- X
- X -T makes flex run in trace mode. It will generate a lot
- X of messages to stdout concerning the form of the input
- X and the resultant non-deterministic and deterministic
- X finite automata. This option is mostly for use in
- X maintaining flex.
- X
- X -8 instructs flex to generate an 8-bit scanner. On some
- X sites, this is the default. On others, the default is
- X 7-bit characters. To see which is the case, check the
- X verbose (-v) output for "equivalence classes created".
- X If the denominator of the number shown is 128, then by
- X default flex is generating 7-bit characters. If it is
- X 256, then the default is 8-bit characters.
- X
- X -C[efmF]
- X controls the degree of table compression.
- X
- X -Ce directs flex to construct equivalence classes,
- X i.e., sets of characters which have identical lexical
- X properties. Equivalence classes usually give dramatic
- X reductions in the final table/object file sizes (typi-
- X cally a factor of 2-5) and are pretty cheap
- X performance-wise (one array look-up per character
- X scanned).
- X
- X -Cf specifies that the full scanner tables should be
- X generated - flex should not compress the tables by tak-
- X ing advantages of similar transition functions for dif-
- X ferent states.
- X
- X -CF specifies that the alternate fast scanner represen-
- X tation (described in flexdoc(1)) should be used.
- X
- X -Cm directs flex to construct meta-equivalence classes,
- X which are sets of equivalence classes (or characters,
- X if equivalence classes are not being used) that are
- X commonly used together. Meta-equivalence classes are
- X often a big win when using compressed tables, but they
- X have a moderate performance impact (one or two "if"
- X tests and one array look-up per character scanned).
- X
- X A lone -C specifies that the scanner tables should be
- X compressed but neither equivalence classes nor meta-
- X equivalence classes should be used.
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 3
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X The options -Cf or -CF and -Cm do not make sense
- X together - there is no opportunity for meta-equivalence
- X classes if the table is not being compressed. Other-
- X wise the options may be freely mixed.
- X
- X The default setting is -Cem, which specifies that flex
- X should generate equivalence classes and meta-
- X equivalence classes. This setting provides the highest
- X degree of table compression. You can trade off
- X faster-executing scanners at the cost of larger tables
- X with the following generally being true:
- X
- X slowest & smallest
- X -Cem
- X -Cm
- X -Ce
- X -C
- X -C{f,F}e
- X -C{f,F}
- X fastest & largest
- X
- X
- X -C options are not cumulative; whenever the flag is
- X encountered, the previous -C settings are forgotten.
- X
- X -Sskeleton_file
- X overrides the default skeleton file from which flex
- X constructs its scanners. You'll never need this option
- X unless you are doing flex maintenance or development.
- X
- XSUMMARY OF FLEX REGULAR EXPRESSIONS
- X The patterns in the input are written using an extended set
- X of regular expressions. These are:
- X
- X x match the character 'x'
- X . any character except newline
- X [xyz] a "character class"; in this case, the pattern
- X matches either an 'x', a 'y', or a 'z'
- X [abj-oZ] a "character class" with a range in it; matches
- X an 'a', a 'b', any letter from 'j' through 'o',
- X or a 'Z'
- X [^A-Z] a "negated character class", i.e., any character
- X but those in the class. In this case, any
- X character EXCEPT an uppercase letter.
- X [^A-Z\n] any character EXCEPT an uppercase letter or
- X a newline
- X r* zero or more r's, where r is any regular expression
- X r+ one or more r's
- X r? zero or one r's (that is, "an optional r")
- X r{2,5} anywhere from two to five r's
- X r{2,} two or more r's
- X r{4} exactly 4 r's
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 4
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X {name} the expansion of the "name" definition
- X (see above)
- X "[xyz]\"foo"
- X the literal string: [xyz]"foo
- X \X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v',
- X then the ANSI-C interpretation of \x.
- X Otherwise, a literal 'X' (used to escape
- X operators such as '*')
- X \123 the character with octal value 123
- X \x2a the character with hexadecimal value 2a
- X (r) match an r; parentheses are used to override
- X precedence (see below)
- X
- X
- X rs the regular expression r followed by the
- X regular expression s; called "concatenation"
- X
- X
- X r|s either an r or an s
- X
- X
- X r/s an r but only if it is followed by an s. The
- X s is not part of the matched text. This type
- X of pattern is called as "trailing context".
- X ^r an r, but only at the beginning of a line
- X r$ an r, but only at the end of a line. Equivalent
- X to "r/\n".
- X
- X
- X <s>r an r, but only in start condition s (see
- X below for discussion of start conditions)
- X <s1,s2,s3>r
- X same, but in any of start conditions s1,
- X s2, or s3
- X
- X
- X <<EOF>> an end-of-file
- X <s1,s2><<EOF>>
- X an end-of-file when in start condition s1 or s2
- X
- X The regular expressions listed above are grouped according
- X to precedence, from highest precedence at the top to lowest
- X at the bottom. Those grouped together have equal pre-
- X cedence.
- X
- X Some notes on patterns:
- X
- X - Negated character classes match newlines unless "\n"
- X (or an equivalent escape sequence) is one of the char-
- X acters explicitly present in the negated character
- X class (e.g., "[^A-Z\n]").
- X
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 5
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X - A rule can have at most one instance of trailing con-
- X text (the '/' operator or the '$' operator). The start
- X condition, '^', and "<<EOF>>" patterns can only occur
- X at the beginning of a pattern, and, as well as with '/'
- X and '$', cannot be grouped inside parentheses. The
- X following are all illegal:
- X
- X foo/bar$
- X foo|(bar$)
- X foo|^bar
- X <sc1>foo<sc2>bar
- X
- X
- XSUMMARY OF SPECIAL ACTIONS
- X In addition to arbitrary C code, the following can appear in
- X actions:
- X
- X - ECHO copies yytext to the scanner's output.
- X
- X - BEGIN followed by the name of a start condition places
- X the scanner in the corresponding start condition.
- X
- X - REJECT directs the scanner to proceed on to the "second
- X best" rule which matched the input (or a prefix of the
- X input). yytext and yyleng are set up appropriately.
- X Note that REJECT is a particularly expensive feature in
- X terms scanner performance; if it is used in any of the
- X scanner's actions it will slow down all of the
- X scanner's matching. Furthermore, REJECT cannot be used
- X with the -f or -F options.
- X
- X Note also that unlike the other special actions, REJECT
- X is a branch; code immediately following it in the
- X action will not be executed.
- X
- X - yymore() tells the scanner that the next time it
- X matches a rule, the corresponding token should be
- X appended onto the current value of yytext rather than
- X replacing it.
- X
- X - yyless(n) returns all but the first n characters of the
- X current token back to the input stream, where they will
- X be rescanned when the scanner looks for the next match.
- X yytext and yyleng are adjusted appropriately (e.g.,
- X yyleng will now be equal to n ).
- X
- X - unput(c) puts the character c back onto the input
- X stream. It will be the next character scanned.
- X
- X - input() reads the next character from the input stream
- X (this routine is called yyinput() if the scanner is
- X compiled using C++).
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 6
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X - yyterminate() can be used in lieu of a return statement
- X in an action. It terminates the scanner and returns a
- X 0 to the scanner's caller, indicating "all done".
- X
- X By default, yyterminate() is also called when an end-
- X of-file is encountered. It is a macro and may be rede-
- X fined.
- X
- X - YY_NEW_FILE is an action available only in <<EOF>>
- X rules. It means "Okay, I've set up a new input file,
- X continue scanning".
- X
- X - yy_create_buffer( file, size ) takes a FILE pointer and
- X an integer size. It returns a YY_BUFFER_STATE handle to
- X a new input buffer large enough to accomodate size
- X characters and associated with the given file. When in
- X doubt, use YY_BUF_SIZE for the size.
- X
- X - yy_switch_to_buffer( new_buffer ) switches the
- X scanner's processing to scan for tokens from the given
- X buffer, which must be a YY_BUFFER_STATE.
- X
- X - yy_delete_buffer( buffer ) deletes the given buffer.
- X
- XVALUES AVAILABLE TO THE USER
- X - char *yytext holds the text of the current token. It
- X may not be modified.
- X
- X - int yyleng holds the length of the current token. It
- X may not be modified.
- X
- X - FILE *yyin is the file which by default flex reads
- X from. It may be redefined but doing so only makes
- X sense before scanning begins. Changing it in the mid-
- X dle of scanning will have unexpected results since flex
- X buffers its input. Once scanning terminates because an
- X end-of-file has been seen, void yyrestart( FILE
- X *new_file ) may be called to point yyin at the new
- X input file.
- X
- X - FILE *yyout is the file to which ECHO actions are done.
- X It can be reassigned by the user.
- X
- X - YY_CURRENT_BUFFER returns a YY_BUFFER_STATE handle to
- X the current buffer.
- X
- XMACROS THE USER CAN REDEFINE
- X - YY_DECL controls how the scanning routine is declared.
- X By default, it is "int yylex()", or, if prototypes are
- X being used, "int yylex(void)". This definition may be
- X changed by redefining the "YY_DECL" macro. Note that
- X if you give arguments to the scanning routine using a
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 7
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X K&R-style/non-prototyped function declaration, you must
- X terminate the definition with a semi-colon (;).
- X
- X - The nature of how the scanner gets its input can be
- X controlled by redefining the YY_INPUT macro.
- X YY_INPUT's calling sequence is
- X "YY_INPUT(buf,result,max_size)". Its action is to
- X place up to max_size characters in the character array
- X buf and return in the integer variable result either
- X the number of characters read or the constant YY_NULL
- X (0 on Unix systems) to indicate EOF. The default
- X YY_INPUT reads from the global file-pointer "yyin". A
- X sample redefinition of YY_INPUT (in the definitions
- X section of the input file):
- X
- X %{
- X #undef YY_INPUT
- X #define YY_INPUT(buf,result,max_size) \
- X { \
- X int c = getchar(); \
- X result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \
- X }
- X %}
- X
- X
- X - When the scanner receives an end-of-file indication
- X from YY_INPUT, it then checks the yywrap() function.
- X If yywrap() returns false (zero), then it is assumed
- X that the function has gone ahead and set up yyin to
- X point to another input file, and scanning continues.
- X If it returns true (non-zero), then the scanner ter-
- X minates, returning 0 to its caller.
- X
- X The default yywrap() always returns 1. Presently, to
- X redefine it you must first "#undef yywrap", as it is
- X currently implemented as a macro. It is likely that
- X yywrap() will soon be defined to be a function rather
- X than a macro.
- X
- X - YY_USER_ACTION can be redefined to provide an action
- X which is always executed prior to the matched rule's
- X action.
- X
- X - The macro YY_USER_INIT may be redefined to provide an
- X action which is always executed before the first scan.
- X
- X - In the generated scanner, the actions are all gathered
- X in one large switch statement and separated using
- X YY_BREAK, which may be redefined. By default, it is
- X simply a "break", to separate each rule's action from
- X the following rule's.
- X
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 8
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- XFILES
- X flex.skel
- X skeleton scanner.
- X
- X lex.yy.c
- X generated scanner (called lexyy.c on some systems).
- X
- X lex.backtrack
- X backtracking information for -b flag (called lex.bck on
- X some systems).
- X
- X -lfl library with which to link the scanners.
- X
- XSEE ALSO
- X flexdoc(1), lex(1), yacc(1), sed(1), awk(1).
- X
- X M. E. Lesk and E. Schmidt, LEX - Lexical Analyzer Generator
- X
- XDIAGNOSTICS
- X reject_used_but_not_detected undefined or
- X
- X yymore_used_but_not_detected undefined - These errors can
- X occur at compile time. They indicate that the scanner uses
- X REJECT or yymore() but that flex failed to notice the fact,
- X meaning that flex scanned the first two sections looking for
- X occurrences of these actions and failed to find any, but
- X somehow you snuck some in (via a #include file, for exam-
- X ple). Make an explicit reference to the action in your flex
- X input file. (Note that previously flex supported a
- X %used/%unused mechanism for dealing with this problem; this
- X feature is still supported but now deprecated, and will go
- X away soon unless the author hears from people who can argue
- X compellingly that they need it.)
- X
- X flex scanner jammed - a scanner compiled with -s has encoun-
- X tered an input string which wasn't matched by any of its
- X rules.
- X
- X flex input buffer overflowed - a scanner rule matched a
- X string long enough to overflow the scanner's internal input
- X buffer (16K bytes - controlled by YY_BUF_MAX in
- X "flex.skel").
- X
- X scanner requires -8 flag - Your scanner specification
- X includes recognizing 8-bit characters and you did not
- X specify the -8 flag (and your site has not installed flex
- X with -8 as the default).
- X
- X fatal flex scanner internal error--end of buffer missed -
- X This can occur in an scanner which is reentered after a
- X long-jump has jumped out (or over) the scanner's activation
- X frame. Before reentering the scanner, use:
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 9
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X yyrestart( yyin );
- X
- X
- X too many %t classes! - You managed to put every single char-
- X acter into its own %t class. flex requires that at least
- X one of the classes share characters.
- X
- XAUTHOR
- X Vern Paxson, with the help of many ideas and much inspira-
- X tion from Van Jacobson. Original version by Jef Poskanzer.
- X
- X See flexdoc(1) for additional credits and the address to
- X send comments to.
- X
- XDEFICIENCIES / BUGS
- X Some trailing context patterns cannot be properly matched
- X and generate warning messages ("Dangerous trailing con-
- X text"). These are patterns where the ending of the first
- X part of the rule matches the beginning of the second part,
- X such as "zx*/xy*", where the 'x*' matches the 'x' at the
- X beginning of the trailing context. (Note that the POSIX
- X draft states that the text matched by such patterns is unde-
- X fined.)
- X
- X For some trailing context rules, parts which are actually
- X fixed-length are not recognized as such, leading to the
- X abovementioned performance loss. In particular, parts using
- X '|' or {n} (such as "foo{3}") are always considered
- X variable-length.
- X
- X Combining trailing context with the special '|' action can
- X result in fixed trailing context being turned into the more
- X expensive variable trailing context. For example, this hap-
- X pens in the following example:
- X
- X %%
- X abc |
- X xyz/def
- X
- X
- X Use of unput() invalidates yytext and yyleng.
- X
- X Use of unput() to push back more text than was matched can
- X result in the pushed-back text matching a beginning-of-line
- X ('^') rule even though it didn't come at the beginning of
- X the line (though this is rare!).
- X
- X Pattern-matching of NUL's is substantially slower than
- X matching other characters.
- X
- X flex does not generate correct #line directives for code
- X internal to the scanner; thus, bugs in flex.skel yield bogus
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 10
- X
- X
- X
- X
- X
- X
- XFLEX(1) USER COMMANDS FLEX(1)
- X
- X
- X
- X line numbers.
- X
- X Due to both buffering of input and read-ahead, you cannot
- X intermix calls to <stdio.h> routines, such as, for example,
- X getchar(), with flex rules and expect it to work. Call
- X input() instead.
- X
- X The total table entries listed by the -v flag excludes the
- X number of table entries needed to determine what rule has
- X been matched. The number of entries is equal to the number
- X of DFA states if the scanner does not use REJECT, and some-
- X what greater than the number of states if it does.
- X
- X REJECT cannot be used with the -f or -F options.
- X
- X Some of the macros, such as yywrap(), may in the future
- X become functions which live in the -lfl library. This will
- X doubtless break a lot of code, but may be required for
- X POSIX-compliance.
- X
- X The flex internal algorithms need documentation.
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- XVersion 2.3 Last change: 26 May 1990 11
- X
- X
- X
- END_OF_FILE
- if test 25372 -ne `wc -c <'flex.Doc'`; then
- echo shar: \"'flex.Doc'\" unpacked with wrong size!
- fi
- # end of 'flex.Doc'
- fi
- echo shar: End of archive 5 \(of 13\).
- cp /dev/null ark5isdone
- MISSING=""
- for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have unpacked all 13 archives.
- rm -f ark[1-9]isdone ark[1-9][0-9]isdone
- else
- echo You still need to unpack the following archives:
- echo " " ${MISSING}
- fi
- ## End of shell archive.
- exit 0
- --
- Mail submissions (sources or binaries) to <amiga@uunet.uu.net>.
- Mail comments to the moderator at <amiga-request@uunet.uu.net>.
- Post requests for sources, and general discussion to comp.sys.amiga.
-